import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import MultiComparison
import pingouin as pg
from matplotlib.backends.backend_pdf import PdfPages

# Load the data
df = pd.read_csv(r"D:\RENJIN RAJU\MASTERS\SEMESTER 3\THESIS\Research\Analysis\Survey data cleaned.csv",
                 encoding='latin-1')


# Data Cleaning Functions
def clean_yes_no(value):
    if pd.isna(value):
        return np.nan
    value = str(value).strip().lower()
    if value in ['yes', 'y', '1', 'true']:
        return 1
    elif value in ['no', 'n', '0', 'false']:
        return 0
    return np.nan


def clean_implementation_status(value):
    if pd.isna(value):
        return np.nan
    if isinstance(value, (int, float)):
        return float(value)
    try:
        return float(str(value).split('_')[0])
    except:
        return np.nan


# Data Cleaning
def clean_data(df):
    df_clean = df.copy()

    # Clean disruptions
    disruption_cols = ['Disruption_Natural_Disasters', 'Disruption_Supplier_Failures',
                       'Disruption_Cybersecurity_Threats', 'Disruption_Global_Crises',
                       'Disruption_Transportation_Delays']
    for col in disruption_cols:
        if col in df_clean.columns:
            df_clean[col] = df_clean[col].apply(clean_yes_no)
    df_clean['disruption_score'] = df_clean[disruption_cols].sum(axis=1)

    # Clean technology adoption
    tech_cols = ['adoption_Artificial_Intelligence_AI', 'adoption_Internet_of_Things_IoT',
                 'adoption_Blockchain', 'adoption_Cloud_Computing', 'adoption_Robotic_and_Automation',
                 'adoption_Big_Data_Analytics', 'adoption_Simulation', 'adoption_Other']
    for col in tech_cols:
        if col in df_clean.columns:
            df_clean[col] = df_clean[col].apply(clean_yes_no)
    df_clean['adoption_score'] = df_clean[tech_cols].sum(axis=1)

    # Clean implementation status
    imp_cols = ['implemented_AI', 'implemented_IoT', 'implemented_Blockchain',
                'implemented_Cloud_Computing', 'implemented_Robotics_and_Automation',
                'implemented_Big_Data_Analytics']
    for col in imp_cols:
        if col in df_clean.columns:
            df_clean[col] = df_clean[col].apply(clean_implementation_status)
    df_clean['implementation_score'] = df_clean[imp_cols].mean(axis=1)

    # Clean barriers
    barrier_cols = ['barriers_High_Implementation_Costs', 'barriers_Lack_of_Skilled_Workforce',
                    'barriers_Integration_Issues', 'barriers_Resistance_to_Change']
    for col in barrier_cols:
        if col in df_clean.columns:
            df_clean[col] = df_clean[col].apply(clean_yes_no)

    # Clean collaboration
    collab_cols = ['resilience_collaboration_Cloud-based_platforms',
                   'resilience_collaboration_Blockchain_for_transparency',
                   'resilience_collaboration_AI_powered_predictive_analytics',
                   'resilience_collaboration_IoT_for_real-time_tracking',
                   'resilience_collaboration_Digital_twin_Simulations']
    for col in collab_cols:
        if col in df_clean.columns:
            df_clean[col] = df_clean[col].apply(clean_yes_no)
    df_clean['collaboration_score'] = df_clean[collab_cols].sum(axis=1)

    # Size categories
    df_clean['size'] = pd.to_numeric(df_clean['size'], errors='coerce')
    df_clean['size_category'] = pd.cut(df_clean['size'],
                                       bins=[0, 250, 750, np.inf],
                                       labels=['SME', 'Medium', 'Large'])

    # Resilience score
    response_cols = ['response_Increased_Inventory_Buffers', 'response_Diversified_Suppliers',
                     'response_Implemented_New_Technologies', 'response_Other']
    for col in response_cols:
        if col in df_clean.columns:
            df_clean[col] = df_clean[col].apply(clean_yes_no)
    df_clean['response_score'] = df_clean[response_cols].sum(axis=1)
    df_clean['resilience_score'] = (df_clean['response_score'] + df_clean['collaboration_score']) / 2

    return df_clean, tech_cols


# Clean the data
df_clean, tech_cols = clean_data(df)

def print_header(title):
    print("\n" + "=" * 80)
    print(title.upper())
    print("=" * 80)
# Non-parametric alternatives to tests

def test_h1a(df_clean):
    """H1a: Disruptions and Technology Adoption (Non-parametric version)"""
    print_header("H1a: Disruptions and Technology Adoption")

    # Mann-Whitney U test for disruption score and adoption score (non-parametric alternative to Pearson correlation)
    disruption = df_clean['disruption_score'].dropna()
    adoption = df_clean['adoption_score'].dropna()

    u_stat, p_val = stats.mannwhitneyu(disruption, adoption)
    print(f"Mann-Whitney U test between disruption score and adoption score: U={u_stat:.3f}, p-value: {p_val:.4f}")

    if p_val < 0.05:
        print("Conclusion: Reject null hypothesis - Significant relationship exists.")
    else:
        print("Conclusion: Fail to reject null hypothesis - No significant relationship found.")

    plt.figure(figsize=(8, 6))
    plt.boxplot([disruption, adoption], labels=['Disruption Score', 'Adoption Score'])
    plt.title('Boxplot: Disruption Score vs Adoption Score')
    plt.ylabel('Score')
    plt.show()

    # Alternatively, plotting a violin plot to visualize distribution
    plt.figure(figsize=(8, 6))
    plt.violinplot([disruption, adoption], showmeans=True)
    plt.xticks([1, 2], ['Disruption Score', 'Adoption Score'])
    plt.title('Violin Plot: Disruption Score vs Adoption Score')
    plt.ylabel('Score')
    plt.show()


def test_h1b(df_clean, tech_cols):
    """H1b: The impact of disruptions significantly influences the choice of resilience-enhancing technologies."""
    print_header("H1b: Disruption Impact on Technology Choice")

    print("Chi-square and Fisher's Exact Test results for disruption-technology relationships:")
    results = []

    for tech in tech_cols:
        # Create contingency table
        contingency = pd.crosstab(df_clean['disruption_score'], df_clean[tech])

        # Check if the contingency table is 2x2 for Fisher's Exact Test
        if contingency.shape == (2, 2):
            # Perform Fisher's Exact test for 2x2 tables
            odds_ratio, p_value = stats.fisher_exact(contingency)
            test_type = "Fisher's Exact Test"
        else:
            # Perform Chi-square test for larger contingency tables
            chi2, p_value, dof, expected = stats.chi2_contingency(contingency)
            odds_ratio = None  # Not relevant for Chi-square test
            test_type = "Chi-square Test"

        results.append({
            'Technology': tech.replace('adoption_', '').replace('_', ' '),
            'Test Type': test_type,
            'Odds Ratio (if 2x2)': odds_ratio,
            'p-value': p_value
        })

    results_df = pd.DataFrame(results).sort_values('p-value')
    print(results_df[['Technology', 'Test Type', 'Odds Ratio (if 2x2)', 'p-value']].head(10).to_string(index=False))

    for tech in tech_cols:
        contingency = pd.crosstab(df_clean['disruption_score'], df_clean[tech])

        # Create a heatmap of the contingency table
        plt.figure(figsize=(6, 5))
        plt.title(f"Contingency Table Heatmap for Disruption vs {tech.replace('adoption_', '').replace('_', ' ')}")
        sns.heatmap(contingency, annot=True, fmt='d', cmap='Blues', cbar=False)
        plt.xlabel('Technology Adoption')
        plt.ylabel('Disruption Score')
        plt.show()

        # Bar plot for p-values and odds ratios (if applicable)
    p_values = results_df['p-value']
    odds_ratios = results_df['Odds Ratio (if 2x2)'].fillna(0)  # Replace NaN with 0 for non-2x2 cases
    technologies = results_df['Technology']

    plt.figure(figsize=(10, 6))
    plt.bar(technologies, p_values, color='lightblue', label='p-value')
    plt.ylabel('p-value')
    plt.xticks(rotation=90)
    plt.title('P-values for Disruption-Technology Relationships')
    plt.tight_layout()
    plt.show()

    # For Odds Ratios, if available (only for 2x2 tables)
    plt.figure(figsize=(10, 6))
    plt.bar(technologies, odds_ratios, color='lightcoral', label='Odds Ratio')
    plt.ylabel('Odds Ratio')
    plt.xticks(rotation=90)
    plt.title('Odds Ratios for Disruption-Technology Relationships')
    plt.tight_layout()
    plt.show()


def categorize_size(row):
    if row == 50:
        return 'SME'
    elif row == 500:
        return 'Medium'
    elif row == 750:
        return 'Large'
    return 'Unknown'

# Apply the categorization function to the 'size' column
df_clean['size_category'] = df_clean['size'].apply(categorize_size)

# Step 2: Perform the Kruskal-Wallis test for adoption score by size category
def test_h2a(df_clean):
    """H2a: Technology Adoption by Company Size (Non-parametric version)"""
    print_header("H2a: Technology Adoption by Company Size")

    # Kruskal-Wallis test for adoption score by size category (non-parametric alternative to ANOVA)
    stat, p_val = stats.kruskal(
        df_clean[df_clean['size_category'] == 'SME']['adoption_score'].dropna(),
        df_clean[df_clean['size_category'] == 'Medium']['adoption_score'].dropna(),
        df_clean[df_clean['size_category'] == 'Large']['adoption_score'].dropna()
    )

    print(f"Kruskal-Wallis H test for adoption_score by size_category: H={stat:.2f}, p={p_val:.4f}")

    if p_val < 0.05:
        print("Conclusion: Significant differences in adoption by company size.")
    else:
        print("Conclusion: No significant differences in adoption by company size.")

    plt.figure(figsize=(8, 6))
    df_clean.boxplot(column='adoption_score', by='size_category', showmeans=True)
    plt.title('Boxplot: Technology Adoption by Company Size')
    plt.suptitle('')  # Remove default title generated by boxplot
    plt.xlabel('Company Size')
    plt.ylabel('Adoption Score')
    plt.show()

    # Violin plot for adoption score by company size (alternative to boxplot)
    plt.figure(figsize=(8, 6))
    sns.violinplot(x='size_category', y='adoption_score', data=df_clean, inner="quart")
    plt.title('Violin Plot: Technology Adoption by Company Size')
    plt.xlabel('Company Size')
    plt.ylabel('Adoption Score')
    plt.show()


def test_h2b(df_clean):
    """H2b: Technology Adoption in Disrupted Industries (Non-parametric version)"""
    print_header("H2b: Technology Adoption in Disrupted Industries")

    # Create a 'high_disruption_industry' column based on disruption score
    # Calculate the mean disruption score for each industry
    industry_disruption = df_clean.groupby('Industry')['disruption_score'].mean()
    top_disrupted = industry_disruption[industry_disruption > industry_disruption.median()].index.tolist()

    # Mark industries as highly disrupted (True) or not (False)
    df_clean['high_disruption_industry'] = df_clean['Industry'].isin(top_disrupted)

    print("Fisher’s Exact Test results for disruption-technology relationships:")
    results = []
    for tech in ['adoption_Artificial_Intelligence_AI', 'adoption_Internet_of_Things_IoT',
                 'adoption_Blockchain', 'adoption_Cloud_Computing', 'adoption_Robotic_and_Automation',
                 'adoption_Big_Data_Analytics', 'adoption_Simulation', 'adoption_Other']:
        if tech in df_clean.columns:
            # Create contingency table for high disruption vs technology adoption
            contingency = pd.crosstab(df_clean['high_disruption_industry'], df_clean[tech])

            # Perform Fisher’s Exact test
            odds_ratio, p_value = stats.fisher_exact(contingency)
            results.append({
                'Technology': tech.replace('adoption_', '').replace('_', ' '),
                'Odds Ratio': odds_ratio,
                'p-value': p_value
            })

            print(
                f"{tech.replace('adoption_', '').replace('_', ' ')}: Odds Ratio = {odds_ratio:.2f}, p-value = {p_value:.4f}")

    for tech in ['adoption_Artificial_Intelligence_AI', 'adoption_Internet_of_Things_IoT',
                 'adoption_Blockchain', 'adoption_Cloud_Computing', 'adoption_Robotic_and_Automation',
                 'adoption_Big_Data_Analytics', 'adoption_Simulation', 'adoption_Other']:
        if tech in df_clean.columns:
            contingency = pd.crosstab(df_clean['high_disruption_industry'], df_clean[tech])

            # Plotting stacked bar chart for disruption vs technology adoption
            contingency.plot(kind='bar', stacked=True, figsize=(8, 6), color=['lightblue', 'lightcoral'])
            plt.title(f"Stacked Bar Chart: Disruption vs {tech.replace('adoption_', '').replace('_', ' ')} Adoption")
            plt.xlabel('High Disruption Industry')
            plt.ylabel('Count')
            plt.xticks(rotation=0)
            plt.legend(title='Technology Adoption', labels=['Non-Adopters', 'Adopters'])
            plt.show()

        # Bar plot for Odds Ratios and p-values for each technology
    results_df = pd.DataFrame(results)

    # Plot Odds Ratios
    plt.figure(figsize=(8, 6))
    plt.bar(results_df['Technology'], results_df['Odds Ratio'], color='lightcoral')
    plt.ylabel('Odds Ratio')
    plt.title('Odds Ratios for Disruption-Technology Relationships')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

    # Plot p-values
    plt.figure(figsize=(8, 6))
    plt.bar(results_df['Technology'], results_df['p-value'], color='lightblue')
    plt.ylabel('p-value')
    plt.title('P-values for Disruption-Technology Relationships')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


def test_h3a(df_clean, tech_cols):
    """H3a: Technology Impact on Resilience (Non-parametric version)"""
    print_header("H3a: Technology Impact on Resilience")

    # Mann-Whitney U test for AI adopters vs non-adopters
    ai_adopters = df_clean[df_clean['adoption_Artificial_Intelligence_AI'] == 1]
    ai_non_adopters = df_clean[df_clean['adoption_Artificial_Intelligence_AI'] == 0]

    # Mann-Whitney U test for IoT adopters vs non-adopters
    iot_adopters = df_clean[df_clean['adoption_Internet_of_Things_IoT'] == 1]
    iot_non_adopters = df_clean[df_clean['adoption_Internet_of_Things_IoT'] == 0]

    # Mann-Whitney U test for AI
    ai_u_stat, ai_p_val = stats.mannwhitneyu(ai_adopters['resilience_score'].dropna(),
                                             ai_non_adopters['resilience_score'].dropna())
    print(f"Mann-Whitney U test between AI adopters and non-adopters: U={ai_u_stat:.3f}, p-value={ai_p_val:.4f}")

    # Mann-Whitney U test for IoT
    iot_u_stat, iot_p_val = stats.mannwhitneyu(iot_adopters['resilience_score'].dropna(),
                                               iot_non_adopters['resilience_score'].dropna())
    print(f"Mann-Whitney U test between IoT adopters and non-adopters: U={iot_u_stat:.3f}, p-value={iot_p_val:.4f}")

    # Print conclusions based on p-values
    if ai_p_val < 0.05:
        print("Conclusion: Significant difference in resilience between AI adopters and non-adopters.")
    else:
        print("Conclusion: No significant difference in resilience between AI adopters and non-adopters.")

    if iot_p_val < 0.05:
        print("Conclusion: Significant difference in resilience between IoT adopters and non-adopters.")
    else:
        print("Conclusion: No significant difference in resilience between IoT adopters and non-adopters.")

    # Visual Representation

    # Boxplot for AI adoption vs non-adopters
    plt.figure(figsize=(8, 6))
    df_clean.boxplot(column='resilience_score', by='adoption_Artificial_Intelligence_AI', showmeans=True)
    plt.title('Boxplot: Resilience Scores by AI Adoption')
    plt.suptitle('')  # Remove default title generated by boxplot
    plt.xlabel('AI Adoption (0 = Non-Adopters, 1 = Adopters)')
    plt.ylabel('Resilience Score')
    plt.show()

    # Violin plot for AI adoption vs non-adopters (alternative to boxplot)
    plt.figure(figsize=(8, 6))
    sns.violinplot(x='adoption_Artificial_Intelligence_AI', y='resilience_score', data=df_clean, inner="quart")
    plt.title('Violin Plot: Resilience Scores by AI Adoption')
    plt.xlabel('AI Adoption (0 = Non-Adopters, 1 = Adopters)')
    plt.ylabel('Resilience Score')
    plt.show()

    # Boxplot for IoT adoption vs non-adopters
    plt.figure(figsize=(8, 6))
    df_clean.boxplot(column='resilience_score', by='adoption_Internet_of_Things_IoT', showmeans=True)
    plt.title('Boxplot: Resilience Scores by IoT Adoption')
    plt.suptitle('')  # Remove default title generated by boxplot
    plt.xlabel('IoT Adoption (0 = Non-Adopters, 1 = Adopters)')
    plt.ylabel('Resilience Score')
    plt.show()

    # Violin plot for IoT adoption vs non-adopters (alternative to boxplot)
    plt.figure(figsize=(8, 6))
    sns.violinplot(x='adoption_Internet_of_Things_IoT', y='resilience_score', data=df_clean, inner="quart")
    plt.title('Violin Plot: Resilience Scores by IoT Adoption')
    plt.xlabel('IoT Adoption (0 = Non-Adopters, 1 = Adopters)')
    plt.ylabel('Resilience Score')
    plt.show()


def test_h3b(df_clean, tech_cols):
    """H3b: Combined Technology Impact on Resilience (Non-parametric version)"""
    print_header("H3b: Combined Technology Impact on Resilience")

    # Create tech_count and tech_group for combined technology adoption
    df_clean['tech_count'] = df_clean[tech_cols].sum(axis=1)
    df_clean['tech_group'] = pd.cut(df_clean['tech_count'], bins=[0, 1, 3, len(tech_cols)], labels=['0-1', '2-3', '4+'])

    # Kruskal-Wallis test for resilience score by tech group (0-1, 2-3, 4+ technologies)
    stat, p_val = stats.kruskal(
        df_clean[df_clean['tech_group'] == '0-1']['resilience_score'].dropna(),
        df_clean[df_clean['tech_group'] == '2-3']['resilience_score'].dropna(),
        df_clean[df_clean['tech_group'] == '4+']['resilience_score'].dropna()
    )
    print(f"Kruskal-Wallis H test for resilience score by tech group: H={stat:.2f}, p={p_val:.4f}")

    # Perform pairwise Mann-Whitney U tests between the groups
    tech_groups = ['0-1', '2-3', '4+']
    for i in range(len(tech_groups)):
        for j in range(i + 1, len(tech_groups)):
            group1 = tech_groups[i]
            group2 = tech_groups[j]
            group1_data = df_clean[df_clean['tech_group'] == group1]['resilience_score'].dropna()
            group2_data = df_clean[df_clean['tech_group'] == group2]['resilience_score'].dropna()

            u_stat, pairwise_p_val = stats.mannwhitneyu(group1_data, group2_data)
            print(f"Mann-Whitney U test between {group1} and {group2}: U={u_stat:.3f}, p-value={pairwise_p_val:.4f}")

    # Visual Representation: Boxplot for resilience scores by technology group
    plt.figure(figsize=(8, 6))
    df_clean.boxplot(column='resilience_score', by='tech_group', showmeans=True)
    plt.title('Boxplot: Resilience Scores by Technology Group')
    plt.suptitle('')  # Remove default title generated by boxplot
    plt.xlabel('Technology Adoption Group')
    plt.ylabel('Resilience Score')
    plt.show()

    # Violin plot for resilience scores by technology group (alternative to boxplot)
    plt.figure(figsize=(8, 6))
    sns.violinplot(x='tech_group', y='resilience_score', data=df_clean, inner="quart")
    plt.title('Violin Plot: Resilience Scores by Technology Group')
    plt.xlabel('Technology Adoption Group')
    plt.ylabel('Resilience Score')
    plt.show()

    # Print detailed results for each technology group
    print("\nResilience Score Statistics by Technology Group:")
    for group in ['0-1', '2-3', '4+']:
        group_data = df_clean[df_clean['tech_group'] == group]['resilience_score'].dropna()
        print(f"Group {group}:")
        print(f"  Count: {len(group_data)}")
        print(f"  Mean: {group_data.mean():.2f}")
        print(f"  Median: {group_data.median():.2f}")
        print(f"  Standard Deviation: {group_data.std():.2f}")
        print("-" * 50)


def test_h4a(df_clean):
    """H4a: Barriers to Technology Adoption (Non-parametric version)"""
    print_header("H4a: Barriers to Technology Adoption")

    # Fisher’s Exact Test for barrier frequencies
    barrier_counts = df_clean[['barriers_High_Implementation_Costs', 'barriers_Lack_of_Skilled_Workforce',
                               'barriers_Integration_Issues', 'barriers_Resistance_to_Change']].sum()
    print("\nBarrier frequencies:")
    print(barrier_counts)

    # For each barrier, create a contingency table for Fisher's Exact Test or Chi-square
    results = []

    for barrier in ['barriers_High_Implementation_Costs', 'barriers_Lack_of_Skilled_Workforce',
                    'barriers_Integration_Issues', 'barriers_Resistance_to_Change']:
        if barrier in df_clean.columns:
            contingency = pd.crosstab(df_clean[barrier], df_clean['size_category'])

            # Check if the contingency table is 2x2 for Fisher's Exact Test
            if contingency.shape == (2, 2):
                # Perform Fisher's Exact test for 2x2 tables
                odds_ratio, p_val = stats.fisher_exact(contingency)
                test_type = "Fisher's Exact Test"
            else:
                # Perform Chi-square test for larger contingency tables
                chi2, p_val, dof, expected = stats.chi2_contingency(contingency)
                odds_ratio = None  # Not relevant for Chi-square test
                test_type = "Chi-square Test"

            print(f"\n{barrier.replace('barriers_', '').replace('_', ' ')}:")
            print(f"Test Type: {test_type}")
            print(f"p-value: {p_val:.4f}")
            if test_type == "Fisher's Exact Test":
                print(f"Odds Ratio: {odds_ratio:.2f}")
            print("-" * 80)

            # Store results for visualizations
            results.append({
                'Barrier': barrier.replace('barriers_', '').replace('_', ' '),
                'Test Type': test_type,
                'p-value': p_val,
                'Odds Ratio (if 2x2)': odds_ratio
            })

            # Visual Representation: Stacked Bar Chart for Contingency Table
            plt.figure(figsize=(8, 6))
            contingency.plot(kind='bar', stacked=True, color=['lightblue', 'lightcoral', 'lightgreen'])
            plt.title(f"Stacked Bar Chart: {barrier.replace('barriers_', '').replace('_', ' ')} vs Company Size")
            plt.xlabel('Barrier Presence (0 = No, 1 = Yes)')
            plt.ylabel('Count')
            plt.xticks(rotation=0)
            plt.legend(title='Company Size', labels=['SME', 'Medium', 'Large'])
            plt.show()

            # Heatmap for Contingency Table
            plt.figure(figsize=(8, 6))
            sns.heatmap(contingency, annot=True, fmt='d', cmap='Blues', cbar=False)
            plt.title(f"Heatmap: {barrier.replace('barriers_', '').replace('_', ' ')} vs Company Size")
            plt.xlabel('Company Size')
            plt.ylabel('Barrier Presence (0 = No, 1 = Yes)')
            plt.show()

    # Visual Representation: Bar Plot for P-values
    results_df = pd.DataFrame(results)

    plt.figure(figsize=(10, 6))
    plt.bar(results_df['Barrier'], results_df['p-value'], color='lightblue')
    plt.ylabel('p-value')
    plt.title('P-values for Barriers to Technology Adoption')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

    # For Odds Ratios, if available (only for 2x2 tables)
    plt.figure(figsize=(10, 6))
    plt.bar(results_df['Barrier'], results_df['Odds Ratio (if 2x2)'].fillna(0), color='lightcoral')
    plt.ylabel('Odds Ratio')
    plt.title('Odds Ratios for Barriers to Technology Adoption')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()


def test_h4b(df_clean, tech_cols):
    """H4b: Workforce Barrier Impact on Technology Adoption (Non-parametric version)"""
    print_header("H4b: Workforce Barrier Impact on Technology Adoption")

    # Define the workforce barrier and technology adoption columns
    workforce_barrier = 'barriers_Lack_of_Skilled_Workforce'
    tech_cols = [
        'adoption_Artificial_Intelligence_AI', 'adoption_Internet_of_Things_IoT',
        'adoption_Blockchain', 'adoption_Cloud_Computing', 'adoption_Robotic_and_Automation',
        'adoption_Big_Data_Analytics', 'adoption_Simulation', 'adoption_Other'
    ]

    # Loop through each technology to perform the Mann-Whitney U test and plot
    for tech in tech_cols:
        with_barrier = df_clean[df_clean[workforce_barrier] == 1][tech].dropna()
        without_barrier = df_clean[df_clean[workforce_barrier] == 0][tech].dropna()

        # Mann-Whitney U test for technology adoption with and without workforce barrier
        u_stat, p_val = stats.mannwhitneyu(with_barrier, without_barrier)
        print(f"\nMann-Whitney U test between with/without workforce barrier for {tech.replace('adoption_', '').replace('_', ' ').title()} Adoption:")
        print(f"  U={u_stat:.3f}, p-value={p_val:.4f}")

        # Visual Representation: Boxplot for technology adoption with and without workforce barrier
        plt.figure(figsize=(8, 6))
        df_clean.boxplot(column=tech, by=workforce_barrier, showmeans=True)
        plt.title(f'Boxplot: {tech.replace("adoption_", "").replace("_", " ").title()} Adoption by Workforce Barrier')
        plt.suptitle('')  # Remove default title generated by boxplot
        plt.xlabel('Workforce Barrier (0 = No, 1 = Yes)')
        plt.ylabel(f'{tech.replace("adoption_", "").replace("_", " ").title()} Adoption')
        plt.show()

        # Violin plot for technology adoption with and without workforce barrier (alternative to boxplot)
        plt.figure(figsize=(8, 6))
        sns.violinplot(x=workforce_barrier, y=tech, data=df_clean, inner="quart")
        plt.title(f'Violin Plot: {tech.replace("adoption_", "").replace("_", " ").title()} Adoption by Workforce Barrier')
        plt.xlabel('Workforce Barrier (0 = No, 1 = Yes)')
        plt.ylabel(f'{tech.replace("adoption_", "").replace("_", " ").title()} Adoption')
        plt.show()


def test_h5a(df_clean):
    """H5a: Companies that actively collaborate with suppliers and partners using digital platforms experience higher supply chain resilience compared to other technologies."""
    print_header("H5a: Collaboration Impact on Resilience")

    # Correlation between collaboration score and resilience score
    corr, p_val = stats.pearsonr(df_clean['collaboration_score'].dropna(), df_clean['resilience_score'].dropna())
    print(f"Pearson correlation between collaboration and resilience: r={corr:.3f}, p={p_val:.4f}")

    if p_val < 0.05:
        print("Significant positive relationship between collaboration and resilience.")
    else:
        print("No significant relationship between collaboration and resilience.")

    plt.figure(figsize=(8, 6))
    sns.regplot(x='collaboration_score', y='resilience_score', data=df_clean, scatter_kws={'color': 'blue'},
                line_kws={'color': 'red'})
    plt.title('Scatter Plot: Collaboration Score vs Resilience Score')
    plt.xlabel('Collaboration Score')
    plt.ylabel('Resilience Score')
    plt.show()

    # Pie chart for collaboration score distribution
    high_collaboration = df_clean[df_clean['collaboration_score'] > df_clean['collaboration_score'].median()]
    low_collaboration = df_clean[df_clean['collaboration_score'] <= df_clean['collaboration_score'].median()]

    # Create a pie chart for the distribution of high vs low collaboration
    collaboration_counts = [len(high_collaboration), len(low_collaboration)]
    labels = ['High Collaboration', 'Low Collaboration']

    plt.figure(figsize=(8, 8))
    plt.pie(collaboration_counts, labels=labels, autopct='%1.1f%%', startangle=90, colors=['lightblue', 'lightcoral'])
    plt.title('Pie Chart: Distribution of High vs Low Collaboration')
    plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    plt.show()

def test_h5b(df_clean):
    """H5b: The use of technologies strengthens collaboration and trust among stakeholders in the supply chain."""
    print_header("H5b: Technology Impact on Collaboration")

    # Correlation between technology adoption and collaboration
    corr, p_val = stats.pearsonr(df_clean['adoption_score'].dropna(), df_clean['collaboration_score'].dropna())
    print(f"Pearson correlation between technology adoption and collaboration: r={corr:.3f}, p={p_val:.4f}")

    if p_val < 0.05:
        print("Significant positive relationship between technology adoption and collaboration.")
    else:
        print("No significant relationship between technology adoption and collaboration.")

    plt.figure(figsize=(8, 6))
    sns.regplot(x='adoption_score', y='collaboration_score', data=df_clean, scatter_kws={'color': 'blue'},
                line_kws={'color': 'red'})
    plt.title('Scatter Plot: Technology Adoption vs Collaboration')
    plt.xlabel('Technology Adoption Score')
    plt.ylabel('Collaboration Score')
    plt.show()

    # Correlation matrix heatmap for adoption score and collaboration score
    correlation_matrix = df_clean[['adoption_score', 'collaboration_score']].corr()
    plt.figure(figsize=(8, 6))
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', cbar=True)
    plt.title('Correlation Heatmap: Technology Adoption and Collaboration')
    plt.show()

    # Bar Plot for average collaboration score based on technology adoption (adopters vs non-adopters)
    adoption_threshold = df_clean['adoption_score'].median()  # Define threshold for adoption
    df_clean['adoption_group'] = df_clean['adoption_score'].apply(
        lambda x: 'Adopter' if x >= adoption_threshold else 'Non-Adopter')

    plt.figure(figsize=(8, 6))
    sns.barplot(x='adoption_group', y='collaboration_score', data=df_clean, palette='Blues')
    plt.title('Bar Plot: Collaboration Score by Technology Adoption')
    plt.xlabel('Adoption Group (Adopter vs Non-Adopter)')
    plt.ylabel('Collaboration Score')
    plt.show()

def test_h6a(df_clean):
    """H6a: Companies using real-time data analytics and AI-powered decision support systems make more effective and agile supply chain decisions during disruptions."""
    print_header("H6a: Real-time Analytics Impact")

    # Compare resilience for AI and Big Data users (using Mann-Whitney U test)
    print("\nImpact of AI and Big Data Analytics on resilience:")
    for tech in ['adoption_Artificial_Intelligence_AI', 'adoption_Big_Data_Analytics']:
        if tech in df_clean.columns:
            users = df_clean[df_clean[tech] == 1]['resilience_score'].mean()
            non_users = df_clean[df_clean[tech] == 0]['resilience_score'].mean()

            # Mann-Whitney U test (non-parametric alternative to t-test)
            u_stat, p_val = stats.mannwhitneyu(
                df_clean[df_clean[tech] == 1]['resilience_score'].dropna(),
                df_clean[df_clean[tech] == 0]['resilience_score'].dropna()
            )

            tech_name = tech.replace('adoption_', '').replace('_', ' ')
            print(f"\n{tech_name}:")
            print(f"Users mean resilience: {users:.2f}")
            print(f"Non-users mean resilience: {non_users:.2f}")
            print(f"Mann-Whitney U test: U={u_stat:.2f}, p={p_val:.4f}")

            plt.figure(figsize=(8, 6))
            df_clean.boxplot(column='resilience_score', by=tech, showmeans=True)
            plt.title(f'Boxplot: {tech_name} Adoption vs Resilience')
            plt.suptitle('')  # Remove default title generated by boxplot
            plt.xlabel(f'{tech_name} Adoption (0 = Non-Adopters, 1 = Adopters)')
            plt.ylabel('Resilience Score')
            plt.show()

            # Violin plot for users vs non-users of the technology
            plt.figure(figsize=(8, 6))
            sns.violinplot(x=tech, y='resilience_score', data=df_clean, inner="quart")
            plt.title(f'Violin Plot: {tech_name} Adoption vs Resilience')
            plt.xlabel(f'{tech_name} Adoption (0 = Non-Adopters, 1 = Adopters)')
            plt.ylabel('Resilience Score')
            plt.show()

            # Bar Plot for mean resilience scores (users vs non-users)
            plt.figure(figsize=(8, 6))
            mean_resilience = df_clean.groupby(tech)['resilience_score'].mean().reset_index()
            sns.barplot(x=tech, y='resilience_score', data=mean_resilience, palette='Blues')
            plt.title(f'Bar Plot: {tech_name} Adoption vs Mean Resilience')
            plt.xlabel(f'{tech_name} Adoption (0 = Non-Adopters, 1 = Adopters)')
            plt.ylabel('Mean Resilience Score')
            plt.show()

def test_h6b(df_clean):
    """H6b: Organizations that implement automated risk assessment tools report improved supply chain visibility and control."""
    print_header("H6b: Automated Tools Impact")

    # Compare resilience for IoT and Blockchain users (using Mann-Whitney U test)
    print("\nImpact of IoT and Blockchain on resilience:")
    for tech in ['adoption_Internet_of_Things_IoT', 'adoption_Blockchain']:
        if tech in df_clean.columns:
            users = df_clean[df_clean[tech] == 1]['resilience_score'].mean()
            non_users = df_clean[df_clean[tech] == 0]['resilience_score'].mean()

            # Mann-Whitney U test (non-parametric alternative to t-test)
            u_stat, p_val = stats.mannwhitneyu(
                df_clean[df_clean[tech] == 1]['resilience_score'].dropna(),
                df_clean[df_clean[tech] == 0]['resilience_score'].dropna()
            )

            tech_name = tech.replace('adoption_', '').replace('_', ' ')
            print(f"\n{tech_name}:")
            print(f"Users mean resilience: {users:.2f}")
            print(f"Non-users mean resilience: {non_users:.2f}")
            print(f"Mann-Whitney U test: U={u_stat:.2f}, p={p_val:.4f}")

            plt.figure(figsize=(8, 6))
            df_clean.boxplot(column='resilience_score', by=tech, showmeans=True)
            plt.title(f'Boxplot: {tech_name} Adoption vs Resilience')
            plt.suptitle('')  # Remove default title generated by boxplot
            plt.xlabel(f'{tech_name} Adoption (0 = Non-Adopters, 1 = Adopters)')
            plt.ylabel('Resilience Score')
            plt.show()

            # Violin plot for users vs non-users of the technology
            plt.figure(figsize=(8, 6))
            sns.violinplot(x=tech, y='resilience_score', data=df_clean, inner="quart")
            plt.title(f'Violin Plot: {tech_name} Adoption vs Resilience')
            plt.xlabel(f'{tech_name} Adoption (0 = Non-Adopters, 1 = Adopters)')
            plt.ylabel('Resilience Score')
            plt.show()

            # Bar Plot for mean resilience scores (users vs non-users)
            plt.figure(figsize=(8, 6))
            mean_resilience = df_clean.groupby(tech)['resilience_score'].mean().reset_index()
            sns.barplot(x=tech, y='resilience_score', data=mean_resilience, palette='Blues')
            plt.title(f'Bar Plot: {tech_name} Adoption vs Mean Resilience')
            plt.xlabel(f'{tech_name} Adoption (0 = Non-Adopters, 1 = Adopters)')
            plt.ylabel('Mean Resilience Score')
            plt.show()

def research_questions(df_clean, tech_cols):
    """Analyze and display results for research questions"""
    print_header("Research Questions Analysis")

    # RQ1: Disruptions and resilience characteristics
    print("\nRQ1: Which disturbances impact supply chains and characterize resilience?")
    disruption_cols = ['Disruption_Natural_Disasters', 'Disruption_Supplier_Failures',
                       'Disruption_Cybersecurity_Threats', 'Disruption_Global_Crises',
                       'Disruption_Transportation_Delays']
    disruption_counts = df_clean[disruption_cols].sum().sort_values(ascending=False)
    print("\nFrequency of disruptions:")
    print(disruption_counts.to_string())

    plt.figure(figsize=(10, 6))
    disruption_counts.plot(kind='bar', color='lightcoral')
    plt.title('Frequency of Disruptions Impacting Supply Chains')
    plt.xlabel('Disruption Type')
    plt.ylabel('Frequency')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

    # RQ2: Technologies that help build resilience
    print("\nRQ2: Which technologies help build supply chain resilience?")
    eff_cols = ['effective_technology_Rank_1', 'effective_technology_Rank_2',
                'effective_technology_Rank_3', 'effective_technology_Rank_4',
                'effective_technology_Rank_5', 'effective_technology_Rank_6',
                'effective_technology_Rank_7']

    tech_effectiveness = pd.concat([df_clean[col].value_counts() for col in eff_cols], axis=1).fillna(0)
    tech_effectiveness['total'] = tech_effectiveness.sum(axis=1)
    tech_effectiveness = tech_effectiveness.sort_values('total', ascending=False)
    print("\nMost effective technologies for resilience:")
    print(tech_effectiveness['total'].head(10).to_string())

    plt.figure(figsize=(10, 6))
    tech_effectiveness['total'].head(10).plot(kind='bar', color='lightseagreen')
    plt.title('Top 10 Most Effective Technologies for Supply Chain Resilience')
    plt.xlabel('Technology')
    plt.ylabel('Total Votes (Effectiveness)')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()


    # RQ3: Technology implementation in practice
    print("\nRQ3: Which technologies are actually used in practice?")
    adoption_rates = df_clean[tech_cols].mean().sort_values(ascending=False)
    print("\nTechnology adoption rates:")
    print(adoption_rates.to_string())

    plt.figure(figsize=(10, 6))
    adoption_rates.plot(kind='bar', color='dodgerblue')
    plt.title('Technology Adoption Rates in Practice')
    plt.xlabel('Technology')
    plt.ylabel('Adoption Rate')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()


def research_objectives(df_clean):
    """Analyze and display results for research objectives"""
    print_header("Research Objectives Analysis")

    # Objective 1: Assess impact of emerging technologies
    print("\nObjective 1: Impact of emerging technologies on supply chain resilience")
    tech_impact = df_clean.groupby('adoption_score')['resilience_score'].mean()
    print("\nAverage resilience by number of technologies adopted:")
    print(tech_impact.to_string())

    plt.figure(figsize=(8, 6))
    tech_impact.plot(kind='line', marker='o', color='dodgerblue', linewidth=2, markersize=6)
    plt.title('Impact of Emerging Technologies on Supply Chain Resilience')
    plt.xlabel('Number of Technologies Adopted')
    plt.ylabel('Average Resilience Score')
    plt.grid(True)
    plt.xticks(range(int(tech_impact.index.min()), int(tech_impact.index.max()) + 1))  # Adjust x-ticks
    plt.tight_layout()
    plt.show()

    # Alternatively, a bar plot to visualize the impact
    plt.figure(figsize=(8, 6))
    tech_impact.plot(kind='bar', color='lightgreen')
    plt.title('Impact of Emerging Technologies on Supply Chain Resilience')
    plt.xlabel('Number of Technologies Adopted')
    plt.ylabel('Average Resilience Score')
    plt.xticks(rotation=0)  # No rotation for the x-axis labels
    plt.tight_layout()
    plt.show()


    # Objective 2: Identify industry-specific strategies
    print("\nObjective 2: Industry-specific strategies")
    industry_strategies = df_clean.groupby('Industry')[['response_score', 'collaboration_score']].mean()
    print("\nAverage response and collaboration scores by industry:")
    print(industry_strategies.to_string())

    industry_strategies.plot(kind='bar', figsize=(10, 6), color=['lightcoral', 'lightseagreen'])
    plt.title('Average Response and Collaboration Scores by Industry')
    plt.xlabel('Industry')
    plt.ylabel('Score')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

    # Stacked Bar Plot for response_score and collaboration_score by industry
    industry_strategies.plot(kind='bar', stacked=True, figsize=(10, 6), color=['lightcoral', 'lightseagreen'])
    plt.title('Stacked Bar Plot: Response and Collaboration Scores by Industry')
    plt.xlabel('Industry')
    plt.ylabel('Score')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()


    # Objective 3: Evaluate role of AI and predictive analytics
    print("\nObjective 3: Role of AI and predictive analytics")
    ai_users = df_clean[df_clean['adoption_Artificial_Intelligence_AI'] == 1]
    non_ai_users = df_clean[df_clean['adoption_Artificial_Intelligence_AI'] == 0]
    print(f"\nAI users mean resilience: {ai_users['resilience_score'].mean():.2f}")
    print(f"Non-AI users mean resilience: {non_ai_users['resilience_score'].mean():.2f}")

    plt.figure(figsize=(8, 6))
    df_clean.boxplot(column='resilience_score', by='adoption_Artificial_Intelligence_AI', showmeans=True)
    plt.title('Boxplot: Resilience Scores by AI Adoption')
    plt.suptitle('')  # Remove default title generated by boxplot
    plt.xlabel('AI Adoption (0 = Non-Adopters, 1 = Adopters)')
    plt.ylabel('Resilience Score')
    plt.show()

    # Violin plot comparing resilience scores for AI users vs non-users
    plt.figure(figsize=(8, 6))
    sns.violinplot(x='adoption_Artificial_Intelligence_AI', y='resilience_score', data=df_clean, inner="quart")
    plt.title('Violin Plot: Resilience Scores by AI Adoption')
    plt.xlabel('AI Adoption (0 = Non-Adopters, 1 = Adopters)')
    plt.ylabel('Resilience Score')
    plt.show()

    # Bar plot comparing the mean resilience scores for AI users vs non-users
    mean_resilience = df_clean.groupby('adoption_Artificial_Intelligence_AI')['resilience_score'].mean().reset_index()
    plt.figure(figsize=(8, 6))
    sns.barplot(x='adoption_Artificial_Intelligence_AI', y='resilience_score', data=mean_resilience, palette='Blues')
    plt.title('Bar Plot: Mean Resilience Scores by AI Adoption')
    plt.xlabel('AI Adoption (0 = Non-Adopters, 1 = Adopters)')
    plt.ylabel('Mean Resilience Score')
    plt.show()


def test_collaboration_integration_resilience(df_clean):
    """Objective 4: Understand the importance of collaboration and integration in enhancing supply chain resilience."""
    print_header("Objective 4: Collaboration and Integration Impact on Resilience")

    # Pearson correlation between collaboration score and resilience score
    corr, p_val = stats.pearsonr(df_clean['collaboration_score'].dropna(), df_clean['resilience_score'].dropna())
    print(f"Pearson correlation between collaboration and resilience: r={corr:.3f}, p={p_val:.4f}")

    if p_val < 0.05:
        print("Conclusion: Significant positive relationship between collaboration and resilience.")
    else:
        print("Conclusion: No significant relationship between collaboration and resilience.")

    plt.figure(figsize=(8, 6))
    sns.regplot(x='collaboration_score', y='resilience_score', data=df_clean, scatter_kws={'color': 'blue'},
                line_kws={'color': 'red'})
    plt.title('Scatter Plot: Collaboration Score vs Resilience Score')
    plt.xlabel('Collaboration Score')
    plt.ylabel('Resilience Score')
    plt.show()

    # Correlation matrix heatmap (if needed)
    correlation_matrix = df_clean[['collaboration_score', 'resilience_score']].corr()
    plt.figure(figsize=(8, 6))
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', cbar=True)
    plt.title('Correlation Heatmap: Collaboration and Resilience')
    plt.show()


def test_ai_bias_in_supply_chain(df_clean):
    """Objective 5: Explore the ethical implications and potential biases in AI-driven supply chain decisions."""
    print_header("Objective 5: Ethical Implications and Potential Biases in AI Decisions")

    # Mann-Whitney U test to compare resilience scores between AI adopters and non-adopters across industries
    print("\nMann-Whitney U test for AI adoption and resilience by industry:")
    for industry in df_clean['Industry'].unique():
        ai_adopters = df_clean[
            (df_clean['adoption_Artificial_Intelligence_AI'] == 1) & (df_clean['Industry'] == industry)]
        ai_non_adopters = df_clean[
            (df_clean['adoption_Artificial_Intelligence_AI'] == 0) & (df_clean['Industry'] == industry)]

        if len(ai_adopters) > 0 and len(ai_non_adopters) > 0:
            u_stat, p_val = stats.mannwhitneyu(ai_adopters['resilience_score'].dropna(),
                                               ai_non_adopters['resilience_score'].dropna())
            print(f"{industry}: U={u_stat:.3f}, p-value={p_val:.4f}")

            if p_val < 0.05:
                print(
                    f"Conclusion: Significant difference in resilience for AI adopters vs non-adopters in {industry}.")
            else:
                print(
                    f"Conclusion: No significant difference in resilience for AI adopters vs non-adopters in {industry}.")

            plt.figure(figsize=(10, 6))
            df_clean[df_clean['Industry'] == industry].boxplot(
                column='resilience_score', by='adoption_Artificial_Intelligence_AI', showmeans=True)
            plt.title(f'Boxplot: AI Adoption vs Resilience in {industry}')
            plt.suptitle('')  # Remove default title generated by boxplot
            plt.xlabel('AI Adoption (0 = Non-Adopters, 1 = Adopters)')
            plt.ylabel('Resilience Score')
            plt.show()

            # Violin plot for AI adoption vs non-adoption by industry
            plt.figure(figsize=(10, 6))
            sns.violinplot(x='adoption_Artificial_Intelligence_AI', y='resilience_score',
                           data=df_clean[df_clean['Industry'] == industry], inner="quart")
            plt.title(f'Violin Plot: AI Adoption vs Resilience in {industry}')
            plt.xlabel('AI Adoption (0 = Non-Adopters, 1 = Adopters)')
            plt.ylabel('Resilience Score')
            plt.show()

            # Bar plot for mean resilience scores for AI adopters vs non-adopters by industry
            mean_resilience = df_clean[df_clean['Industry'] == industry].groupby(
                'adoption_Artificial_Intelligence_AI')['resilience_score'].mean().reset_index()
            plt.figure(figsize=(8, 6))
            sns.barplot(x='adoption_Artificial_Intelligence_AI', y='resilience_score', data=mean_resilience,
                        palette='Blues')
            plt.title(f'Bar Plot: Mean Resilience by AI Adoption in {industry}')
            plt.xlabel('AI Adoption (0 = Non-Adopters, 1 = Adopters)')
            plt.ylabel('Mean Resilience Score')
            plt.show()


def test_technology_readiness_challenges(df_clean):
    """Objective 6: Determine the readiness and challenges for adopting these technologies across industries."""
    print_header("Objective 6: Readiness and Challenges for Adopting Technologies")

    # Chi-square test for technology adoption by industry
    print("\nChi-square test for technology adoption by industry:")
    for tech in ['adoption_Artificial_Intelligence_AI', 'adoption_Internet_of_Things_IoT',
                     'adoption_Blockchain', 'adoption_Cloud_Computing',
                     'adoption_Robotic_and_Automation', 'adoption_Big_Data_Analytics',
                     'adoption_Simulation', 'adoption_Other']:  # List of technologies
        contingency = pd.crosstab(df_clean['Industry'], df_clean[tech])

        chi2, p_val, dof, expected = stats.chi2_contingency(contingency)
        print(f"{tech.replace('adoption_', '').replace('_', ' ')}: Chi2={chi2:.3f}, p-value={p_val:.4f}")

        if p_val < 0.05:
            print(
                f"Conclusion: Significant relationship between technology adoption and industry for {tech.replace('adoption_', '').replace('_', ' ')}.")
        else:
            print(
                f"Conclusion: No significant relationship between technology adoption and industry for {tech.replace('adoption_', '').replace('_', ' ')}.")

        plt.figure(figsize=(10, 6))
        contingency.plot(kind='bar', stacked=True, color=['lightcoral', 'lightseagreen'], figsize=(10, 6))
        plt.title(f'Stacked Bar Plot: {tech.replace("adoption_", "").replace("_", " ").title()} Adoption by Industry')
        plt.xlabel('Industry')
        plt.ylabel('Count')
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.show()

        # Heatmap for the Chi-square contingency table
        plt.figure(figsize=(10, 6))
        sns.heatmap(contingency, annot=True, cmap='Blues', fmt='d', cbar=True)
        plt.title(f'Heatmap: {tech.replace("adoption_", "").replace("_", " ").title()} Adoption by Industry')
        plt.xlabel('Technology Adoption')
        plt.ylabel('Industry')
        plt.tight_layout()
        plt.show()

        # Bar plot showing adoption counts for each technology in each industry
        plt.figure(figsize=(10, 6))
        adoption_counts = df_clean.groupby('Industry')[tech].sum()
        adoption_counts.plot(kind='bar', color='lightseagreen')
        plt.title(f'Bar Plot: Adoption Count of {tech.replace("adoption_", "").replace("_", " ").title()} by Industry')
        plt.xlabel('Industry')
        plt.ylabel('Adoption Count')
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.show()


# Run all analyses
print("=" * 80)
print("SUPPLY CHAIN RESILIENCE ANALYSIS RESULTS")
print("=" * 80)

# Research questions and objectives
research_questions(df_clean, tech_cols)
research_objectives(df_clean)
test_collaboration_integration_resilience(df_clean)
test_ai_bias_in_supply_chain(df_clean)
test_technology_readiness_challenges(df_clean)

# Hypothesis tests
test_h1a(df_clean)
test_h1b(df_clean, tech_cols)
test_h2a(df_clean)
test_h2b(df_clean)
test_h3a(df_clean, tech_cols)
test_h3b(df_clean, tech_cols)
test_h4a(df_clean)
test_h4b(df_clean, tech_cols)
test_h5a(df_clean)
test_h5b(df_clean)
test_h6a(df_clean)
test_h6b(df_clean)





